Introduction

This script contains code for several attempts at clustering the HumanPilot brain dataset, using various combinations of top PCs, UMAP components, marker genes, and spatial coordinates.

The reasoning is that performing clustering on a combined set of molecular feature dimensions and spatial dimensions (e.g. concatenating the two spatial dimensions as additional columns with the top PCs) is a simple and intuitive way to combine the molecular and spatial data, which should improve the ability of the clustering algorithms to follow the spatial layer structure.

However, it is not clear how best to balance the molecular dimensions (e.g. top PCs, top UMAP coordinates, top marker genes) with the two spatial dimensions. Questions include:

Code

The following code runs clustering using various combinations of top PCs, UMAP components, marker genes, and spatial coordinates. The main parameters and other design choices are:

suppressPackageStartupMessages(library(SingleCellExperiment))
suppressPackageStartupMessages(library(uwot))
suppressPackageStartupMessages(library(scran))
suppressPackageStartupMessages(library(scater))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(RColorBrewer))

Load data

Load SingleCellExperiment data object containing UMI counts, top 50 PCs, and spatial coordinates.

Note: using data from one sample only for now (sample 151673).

# load scran output file
load("../../data/Human_DLPFC_Visium_processedData_sce_scran.Rdata")
sce
## class: SingleCellExperiment 
## dim: 33538 47681 
## metadata(1): image
## assays(2): counts logcounts
## rownames(33538): ENSG00000243485 ENSG00000237613 ... ENSG00000277475
##   ENSG00000268674
## rowData names(5): gene_id gene_version gene_name gene_source
##   gene_biotype
## colnames(47681): AAACAACGAATAGTTC-1 AAACAAGTATCTCCCA-1 ...
##   TTGTTTCCATACAACT-1 TTGTTTGTGTAAATTC-1
## colData names(19): barcode sample_name ... key cell_count
## reducedDimNames(6): PCA TSNE_perplexity50 ... TSNE_perplexity80
##   UMAP_neighbors15
## spikeNames(0):
## altExpNames(0):
# select spots from one sample
ix_151673 <- colData(sce)$sample_name == 151673

sce_151673 <- sce[, ix_151673]
sce_151673
## class: SingleCellExperiment 
## dim: 33538 3639 
## metadata(1): image
## assays(2): counts logcounts
## rownames(33538): ENSG00000243485 ENSG00000237613 ... ENSG00000277475
##   ENSG00000268674
## rowData names(5): gene_id gene_version gene_name gene_source
##   gene_biotype
## colnames(3639): AAACAAGTATCTCCCA-1 AAACAATCTACTAGCA-1 ...
##   TTGTTTGTATTACACG-1 TTGTTTGTGTAAATTC-1
## colData names(19): barcode sample_name ... key cell_count
## reducedDimNames(6): PCA TSNE_perplexity50 ... TSNE_perplexity80
##   UMAP_neighbors15
## spikeNames(0):
## altExpNames(0):

Extract features

Extract features from object: top 50 PCs, spatial coordinates, log counts of marker genes detected using SpatialDE (including some for white matter).

# extract top 50 PCs
dims_pcs <- reducedDim(sce_151673, type = "PCA")
stopifnot(nrow(dims_pcs) == ncol(sce_151673))


# extract spatial dimensions
# note: y coordinate is reversed
xy_coords <- data.frame(
    x_coord = colData(sce_151673)[, c("imagecol")], 
    y_coord = -colData(sce_151673)[, c("imagerow")]
)
dims_spatial <- xy_coords
stopifnot(nrow(dims_spatial) == ncol(sce_151673))


# extract log counts of marker genes from SpatialDE
marker_genes_SpatialDE <- c(
    "NEFH", "PCP4", "HPCAL1", "VSNL1", "TMSB10", "CCK", "HOPX", "PPP3CA", "GPM6A", 
    "ENC1", "NEFM", "NEFL", "DIRAS2", "SNCG", "FBXL16", "GFAP", "CAMK2N1", "CXCL14", 
    "TUBB2A", "OLFM1", "MALAT1", "APLP1", 
    "MOBP", "ERMN", "CLDND1", "TF"
)
length(marker_genes_SpatialDE)
## [1] 26
# identify using gene symbols
ix <- match(marker_genes_SpatialDE, rowData(sce_151673)$gene_name)
length(ix)
## [1] 26
marker_genes_SpatialDE_sym <- rowData(sce_151673)$gene_id[ix]
marker_genes_SpatialDE_sym
##  [1] "ENSG00000100285" "ENSG00000183036" "ENSG00000115756" "ENSG00000163032"
##  [5] "ENSG00000034510" "ENSG00000187094" "ENSG00000171476" "ENSG00000138814"
##  [9] "ENSG00000150625" "ENSG00000171617" "ENSG00000104722" "ENSG00000277586"
## [13] "ENSG00000165023" "ENSG00000173267" "ENSG00000127585" "ENSG00000131095"
## [17] "ENSG00000162545" "ENSG00000145824" "ENSG00000137267" "ENSG00000130558"
## [21] "ENSG00000251562" "ENSG00000105290" "ENSG00000168314" "ENSG00000136541"
## [25] "ENSG00000080822" "ENSG00000091513"
dims_markers_SpatialDE <- t(as.matrix(logcounts(sce_151673)[marker_genes_SpatialDE_sym, , drop = FALSE]))

dims_markers_SpatialDE[1:6, 1:6]
##                    ENSG00000100285 ENSG00000183036 ENSG00000115756
## AAACAAGTATCTCCCA-1       0.4655466       0.0000000       0.8169526
## AAACAATCTACTAGCA-1       0.0000000       1.5349570       0.0000000
## AAACACCAATAACTGC-1       0.0000000       0.0000000       0.0000000
## AAACAGAGCGACTCCT-1       0.0000000       0.7453531       1.2343597
## AAACAGCTTTCAGAAG-1       0.0000000       0.0000000       0.0000000
## AAACAGGGTCTATATT-1       0.0000000       1.4310049       1.4310049
##                    ENSG00000163032 ENSG00000034510 ENSG00000187094
## AAACAAGTATCTCCCA-1        2.973635        2.746873       3.3420101
## AAACAATCTACTAGCA-1        3.390804        2.261710       0.0000000
## AAACACCAATAACTGC-1        0.000000        2.940403       0.9662463
## AAACAGAGCGACTCCT-1        2.519716        2.825269       1.2343597
## AAACAGCTTTCAGAAG-1        3.082373        2.949172       2.6390106
## AAACAGGGTCTATATT-1        1.431005        2.606213       2.1351044
dim(dims_markers_SpatialDE)
## [1] 3639   26
stopifnot(nrow(dims_markers_SpatialDE) == ncol(sce_151673))

Run UMAP

Run UMAP: the aim will be to use the top 5-10 UMAP components for clustering.

Note: running UMAP on the top 50 PCs for faster runtime. To do: run on top 1942 highly variable genes instead.

Note: running UMAP on one sample only; could also run on all samples combined.

# keep top 50 UMAP components
set.seed(123)
dims_umap <- umap(dims_pcs, n_components = 50)

stopifnot(nrow(dims_umap) == ncol(sce_151673))

Run UMAP on marker genes

Another alternative: run UMAP on the set of SpatialDE marker genes.

# keep top 10 UMAP components
set.seed(123)
dims_umap_markers_SpatialDE <- umap(dims_markers_SpatialDE, n_components = 10)

stopifnot(nrow(dims_umap_markers_SpatialDE) == ncol(sce_151673))

Scale spatial dimensions

Need all dimensions to be on approximately comparable scales.

A simple way to achieve this is to scale the spatial dimensions to a range comparable to the range of the top UMAPs or PCs. However the results will be highly sensitive to the exact choice of range.

Note: do not use z-scaling for UMAP, PCs, or spatial dimensions (this would scale up less meaningful dimensions for UMAP or PCs; and doesn’t make sense for physical spatial coordinates). However, marker gene log counts need to be z-scaled.

# check range of UMAP dimensions
summary(dims_umap[, 1:5])
##        V1                V2                   V3                 V4          
##  Min.   :-5.9683   Min.   :-0.4662351   Min.   :-2.43302   Min.   :-0.57427  
##  1st Qu.:-0.1451   1st Qu.:-0.2600787   1st Qu.:-1.31960   1st Qu.:-0.23484  
##  Median : 0.9647   Median :-0.0001477   Median :-0.71714   Median : 0.02704  
##  Mean   : 0.0000   Mean   : 0.0000000   Mean   : 0.00000   Mean   : 0.00000  
##  3rd Qu.: 1.6725   3rd Qu.: 0.2405742   3rd Qu.: 0.01786   3rd Qu.: 0.15711  
##  Max.   : 2.5713   Max.   : 0.5380302   Max.   : 5.11824   Max.   : 0.72873  
##        V5          
##  Min.   :-0.54490  
##  1st Qu.:-0.25571  
##  Median :-0.09029  
##  Mean   : 0.00000  
##  3rd Qu.: 0.07080  
##  Max.   : 1.04638
mean(dims_umap[, 1])
## [1] 3.743539e-17
sd(dims_umap[, 1])
## [1] 2.433886
max(abs(dims_umap))
## [1] 5.968269
range(dims_umap[, 1])
## [1] -5.968269  2.571303
range(dims_umap[, 2])
## [1] -0.4662351  0.5380302
range(dims_umap[, 3])
## [1] -2.433016  5.118238
colnames(dims_umap) <- paste0("UMAP_", seq_len(ncol(dims_umap)))
# check range of PCs
summary(dims_pcs[, 1:5])
##       PC1               PC2               PC3               PC4         
##  Min.   :-21.044   Min.   :-8.1424   Min.   :-6.6266   Min.   :-4.6636  
##  1st Qu.: -1.711   1st Qu.: 0.9596   1st Qu.:-1.4899   1st Qu.:-0.1717  
##  Median :  1.409   Median : 2.2922   Median :-0.2830   Median : 0.8712  
##  Mean   : -1.044   Mean   : 2.2085   Mean   :-0.3011   Mean   : 0.8409  
##  3rd Qu.:  2.830   3rd Qu.: 3.5343   3rd Qu.: 0.9673   3rd Qu.: 1.8674  
##  Max.   :  6.037   Max.   :10.1032   Max.   : 7.1638   Max.   :10.5430  
##       PC5         
##  Min.   :-3.9399  
##  1st Qu.:-1.0913  
##  Median :-0.2810  
##  Mean   :-0.2968  
##  3rd Qu.: 0.4281  
##  Max.   : 6.5935
mean(dims_pcs[, 1])
## [1] -1.044324
sd(dims_pcs[, 1])
## [1] 6.13257
max(abs(dims_pcs))
## [1] 21.04373
range(dims_pcs[, 1])
## [1] -21.043732   6.036751
range(dims_pcs[, 2])
## [1] -8.142405 10.103168
range(dims_pcs[, 3])
## [1] -6.626644  7.163784
# z-scaling for log counts of marker genes
# note: these are not ranked in any order
summary(dims_markers_SpatialDE[, 1:5])
##  ENSG00000100285  ENSG00000183036  ENSG00000115756  ENSG00000163032
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:2.065  
##  Median :0.0000   Median :0.0000   Median :0.4381   Median :2.626  
##  Mean   :0.3494   Mean   :0.5616   Mean   :0.6087   Mean   :2.436  
##  3rd Qu.:0.7051   3rd Qu.:1.0433   3rd Qu.:1.1227   3rd Qu.:3.016  
##  Max.   :3.0817   Max.   :5.4317   Max.   :4.4647   Max.   :5.029  
##  ENSG00000034510
##  Min.   :0.000  
##  1st Qu.:2.704  
##  Median :3.128  
##  Mean   :3.051  
##  3rd Qu.:3.493  
##  Max.   :5.460
dims_markers_SpatialDE <- scale(dims_markers_SpatialDE)

summary(dims_markers_SpatialDE[, 1:5])
##  ENSG00000100285   ENSG00000183036   ENSG00000115756   ENSG00000163032  
##  Min.   :-0.6400   Min.   :-0.8113   Min.   :-0.8722   Min.   :-2.8502  
##  1st Qu.:-0.6400   1st Qu.:-0.8113   1st Qu.:-0.8722   1st Qu.:-0.4334  
##  Median :-0.6400   Median :-0.8113   Median :-0.2444   Median : 0.2228  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.6513   3rd Qu.: 0.6959   3rd Qu.: 0.7366   3rd Qu.: 0.6794  
##  Max.   : 5.0038   Max.   : 7.0354   Max.   : 5.5253   Max.   : 3.0352  
##  ENSG00000034510  
##  Min.   :-4.3215  
##  1st Qu.:-0.4922  
##  Median : 0.1079  
##  Mean   : 0.0000  
##  3rd Qu.: 0.6250  
##  Max.   : 3.4111
mean(dims_markers_SpatialDE[, 1])
## [1] 1.665125e-18
sd(dims_markers_SpatialDE[, 1])
## [1] 1
max(abs(dims_markers_SpatialDE))
## [1] 7.035362
# scale spatial dimensions
summary(as.data.frame(dims_spatial))
##     x_coord         y_coord      
##  Min.   :139.3   Min.   :-521.3  
##  1st Qu.:234.3   1st Qu.:-392.2  
##  Median :314.9   Median :-305.2  
##  Mean   :315.4   Mean   :-302.4  
##  3rd Qu.:396.8   3rd Qu.:-212.2  
##  Max.   :497.8   Max.   :-109.7
range(dims_spatial[, 1])
## [1] 139.3339 497.8398
range(dims_spatial[, 2])
## [1] -521.3322 -109.6760
dims_spatial <- apply(as.matrix(dims_spatial), 2, function(col) {
    (col - min(col)) / (max(col) - min(col)) * 20 - 10
})

colnames(dims_spatial) <- c("spatial_x", "spatial_y")

summary(dims_spatial)
##    spatial_x          spatial_y       
##  Min.   :-10.0000   Min.   :-10.0000  
##  1st Qu.: -4.7000   1st Qu.: -3.7280  
##  Median : -0.2034   Median :  0.5018  
##  Mean   : -0.1781   Mean   :  0.6358  
##  3rd Qu.:  4.3610   3rd Qu.:  5.0180  
##  Max.   : 10.0000   Max.   : 10.0000
stopifnot(nrow(dims_spatial) == ncol(sce_151673))
# alternative ranges for additional plots
dims_spatial2 <- apply(as.matrix(dims_spatial), 2, function(col) {
    (col - min(col)) / (max(col) - min(col)) * 10 - 5
})
summary(dims_spatial2)
##    spatial_x          spatial_y      
##  Min.   :-5.00000   Min.   :-5.0000  
##  1st Qu.:-2.34999   1st Qu.:-1.8640  
##  Median :-0.10168   Median : 0.2509  
##  Mean   :-0.08905   Mean   : 0.3179  
##  3rd Qu.: 2.18052   3rd Qu.: 2.5090  
##  Max.   : 5.00000   Max.   : 5.0000
dims_spatial3 <- apply(as.matrix(dims_spatial), 2, function(col) {
    (col - min(col)) / (max(col) - min(col)) * 4 - 2
})
summary(dims_spatial3)
##    spatial_x          spatial_y      
##  Min.   :-2.00000   Min.   :-2.0000  
##  1st Qu.:-0.94000   1st Qu.:-0.7456  
##  Median :-0.04067   Median : 0.1004  
##  Mean   :-0.03562   Mean   : 0.1272  
##  3rd Qu.: 0.87221   3rd Qu.: 1.0036  
##  Max.   : 2.00000   Max.   : 2.0000
dims_spatial4 <- apply(as.matrix(dims_spatial), 2, function(col) {
    (col - min(col)) / (max(col) - min(col)) * 2 - 1
})
summary(dims_spatial4)
##    spatial_x          spatial_y       
##  Min.   :-1.00000   Min.   :-1.00000  
##  1st Qu.:-0.47000   1st Qu.:-0.37280  
##  Median :-0.02034   Median : 0.05018  
##  Mean   :-0.01781   Mean   : 0.06358  
##  3rd Qu.: 0.43610   3rd Qu.: 0.50180  
##  Max.   : 1.00000   Max.   : 1.00000

Graph-based clustering

Run standard Bioconductor graph-based clustering on subset of molecular dimensions and the two scaled spatial dimensions.

Note: graph-based clustering seems better suited than k-means for this dataset, since the layers do not have an ellipsoidal shape in the spatial dimensions. However, for other datasets, e.g. cancer data, k-means could also work.

Each clustering and plot below uses a different combination of molecular feature dimensions and spatial dimensions.

To do: arrange more systematically; e.g. to show impact of choice of range for scaling spatial dimensions (e.g. larger range tends to give more circular clusters, since spatial information dominates molecular information).

To do: also show impact of number of clusters (do clusters split up into more layers, or into more circular type clusters? from preliminary plots it seemed they tend to split into circular clusters first).

Top PCs plus spatial dimensions

Top 10 PCs plus 2 spatial dimensions, with spatial dimensions scaled to range +10 to -10.

# number of PCs
n_pcs <- 10
dims_clus <- cbind(dims_pcs[, seq_len(n_pcs), drop = FALSE], dims_spatial)
head(dims_clus, 2)
##                           PC1        PC2        PC3         PC4       PC5
## AAACAAGTATCTCCCA-1  2.8833511  1.7285837 -0.7824435 -0.01344705 -1.203797
## AAACAATCTACTAGCA-1 -0.6533501 -0.2294782  0.7125808  3.03055063 -0.927210
##                          PC6       PC7       PC8       PC9      PC10 spatial_x
## AAACAAGTATCTCCCA-1 0.6135239 0.8828088 0.5227014 0.2435614 0.8096759  6.808938
## AAACAATCTACTAGCA-1 0.2433001 2.9532478 0.2841878 0.5876416 1.8839005 -3.288978
##                    spatial_y
## AAACAAGTATCTCCCA-1 -3.186837
## AAACAATCTACTAGCA-1  9.190992
# clustering: see OSCA book (note transpose; number of clusters)
g <- buildSNNGraph(t(dims_clus), k = 10, d = ncol(dims_clus))
g_walk <- igraph::cluster_walktrap(g)

# select number of clusters
n_clus <- 8
clus <- igraph::cut_at(g_walk, n = n_clus)
table(clus)
## clus
##    1    2    3    4    5    6    7    8 
##  765 1541  631  230  335   63   49   25
stopifnot(length(clus) == ncol(sce_151673))

# plot
d_plot <- cbind(xy_coords, cluster = as.factor(clus))

ggplot(d_plot, aes(x = x_coord, y = y_coord, color = cluster)) + 
    geom_point(size = 2, alpha = 0.5) + 
    coord_fixed() + 
    scale_color_brewer(palette = "Paired") + 
    theme_bw() + 
    ggtitle("Clustering on top few PCs plus 2 spatial dims (scaled)")

filename <- "../plots/clustering/clustering_PCs_spatial.png"
ggsave(filename, width = 6, height = 6)

Top 10 PCs plus 2 spatial dimensions, with spatial dimensions scaled to range +5 to -5.

# number of PCs
n_pcs <- 10
dims_clus <- cbind(dims_pcs[, seq_len(n_pcs), drop = FALSE], dims_spatial2)
head(dims_clus, 2)
##                           PC1        PC2        PC3         PC4       PC5
## AAACAAGTATCTCCCA-1  2.8833511  1.7285837 -0.7824435 -0.01344705 -1.203797
## AAACAATCTACTAGCA-1 -0.6533501 -0.2294782  0.7125808  3.03055063 -0.927210
##                          PC6       PC7       PC8       PC9      PC10 spatial_x
## AAACAAGTATCTCCCA-1 0.6135239 0.8828088 0.5227014 0.2435614 0.8096759  3.404469
## AAACAATCTACTAGCA-1 0.2433001 2.9532478 0.2841878 0.5876416 1.8839005 -1.644489
##                    spatial_y
## AAACAAGTATCTCCCA-1 -1.593419
## AAACAATCTACTAGCA-1  4.595496
# clustering: see OSCA book (note transpose; number of clusters)
g <- buildSNNGraph(t(dims_clus), k = 10, d = ncol(dims_clus))
g_walk <- igraph::cluster_walktrap(g)

# select number of clusters
n_clus <- 8
clus <- igraph::cut_at(g_walk, n = n_clus)
table(clus)
## clus
##    1    2    3    4    5    6    7    8 
##  714 1625  525  211  301  191   51   21
stopifnot(length(clus) == ncol(sce_151673))

# plot
d_plot <- cbind(xy_coords, cluster = as.factor(clus))

ggplot(d_plot, aes(x = x_coord, y = y_coord, color = cluster)) + 
    geom_point(size = 2, alpha = 0.5) + 
    coord_fixed() + 
    scale_color_brewer(palette = "Paired") + 
    theme_bw() + 
    ggtitle("Clustering on top few PCs plus 2 spatial dims (scaled)")

filename <- "../plots/clustering/clustering_PCs_spatial2.png"
ggsave(filename, width = 6, height = 6)

Top 10 PCs without any spatial dimensions.

# number of PCs
n_pcs <- 50
dims_clus <- cbind(dims_pcs[, seq_len(n_pcs), drop = FALSE])
head(dims_clus, 2)
##                           PC1        PC2        PC3         PC4       PC5
## AAACAAGTATCTCCCA-1  2.8833511  1.7285837 -0.7824435 -0.01344705 -1.203797
## AAACAATCTACTAGCA-1 -0.6533501 -0.2294782  0.7125808  3.03055063 -0.927210
##                          PC6       PC7       PC8       PC9      PC10       PC11
## AAACAAGTATCTCCCA-1 0.6135239 0.8828088 0.5227014 0.2435614 0.8096759 -0.9685017
## AAACAATCTACTAGCA-1 0.2433001 2.9532478 0.2841878 0.5876416 1.8839005 -0.5789290
##                          PC12       PC13       PC14       PC15       PC16
## AAACAAGTATCTCCCA-1 -0.6274381 -1.0434080  0.2780830 -0.2119032  0.5009080
## AAACAATCTACTAGCA-1  0.5885199  0.6924859 -0.8022115 -0.6613575 -0.1727186
##                          PC17       PC18      PC19       PC20       PC21
## AAACAAGTATCTCCCA-1 -0.5707107 0.07855699 1.0147612 -0.3316051  0.8316342
## AAACAATCTACTAGCA-1  0.6628645 1.12373040 0.3277835 -2.7453910 -0.2442325
##                          PC22       PC23        PC24     PC25       PC26
## AAACAAGTATCTCCCA-1 -0.2787166 0.05044994  0.07206099 0.744943 -0.1023643
## AAACAATCTACTAGCA-1  0.4703310 0.46984762 -0.44842142 0.150810  2.2232703
##                         PC27       PC28      PC29        PC30       PC31
## AAACAAGTATCTCCCA-1 0.5012092 -0.9441800 0.4488365 -0.02456148  0.7081185
## AAACAATCTACTAGCA-1 1.6867129  0.4254494 0.8837966  0.92289385 -1.0654584
##                          PC32       PC33      PC34       PC35       PC36
## AAACAAGTATCTCCCA-1  0.1371749 -0.5776098 0.2822312  0.2760997  0.4230168
## AAACAATCTACTAGCA-1 -1.2237264  0.9441805 2.2768502 -0.2787198 -1.5774850
##                          PC37       PC38       PC39       PC40        PC41
## AAACAAGTATCTCCCA-1 -0.3044026 -0.2631594  0.5971620 -0.5140125  0.04289925
## AAACAATCTACTAGCA-1  1.8357906 -1.4310787 -0.4912725  0.4294031 -0.98897441
##                          PC42       PC43       PC44       PC45      PC46
## AAACAAGTATCTCCCA-1 -0.3631198 -0.1577734 0.06156945  0.1750125 0.5602127
## AAACAATCTACTAGCA-1 -0.9369755 -0.8849352 0.62185428 -0.8876011 0.7225443
##                         PC47       PC48      PC49      PC50
## AAACAAGTATCTCCCA-1 0.3171250  0.3519664 0.6937579 0.3279680
## AAACAATCTACTAGCA-1 0.8187684 -1.2965177 2.4247568 0.1097391
# clustering: see OSCA book (note transpose; number of clusters)
g <- buildSNNGraph(t(dims_clus), k = 10, d = ncol(dims_clus))
g_walk <- igraph::cluster_walktrap(g)

# select number of clusters
n_clus <- 8
clus <- igraph::cut_at(g_walk, n = n_clus)
table(clus)
## clus
##    1    2    3    4    5    6    7    8 
##  166  204  684 1199  839  254  152  141
stopifnot(length(clus) == ncol(sce_151673))

# plot
d_plot <- cbind(xy_coords, cluster = as.factor(clus))

ggplot(d_plot, aes(x = x_coord, y = y_coord, color = cluster)) + 
    geom_point(size = 2, alpha = 0.5) + 
    coord_fixed() + 
    scale_color_brewer(palette = "Paired") + 
    theme_bw() + 
    ggtitle("Clustering on top few PCs")

filename <- "../plots/clustering/clustering_PCs.png"
ggsave(filename, width = 6, height = 6)

Top UMAPs plus spatial dimensions

Top 10 UMAPs plus 2 spatial dimensions, with spatial dimensions scaled to range +10 to -10.

# number of UMAPs
n_umap <- 10
dims_clus <- cbind(dims_umap[, seq_len(n_umap), drop = FALSE], dims_spatial)
head(dims_clus, 2)
##         UMAP_1       UMAP_2     UMAP_3      UMAP_4    UMAP_5      UMAP_6
## [1,] 1.6574038  0.211001493 -1.6190826 -0.04793527 -0.234065 -0.08746051
## [2,] 0.2069426 -0.002405581 -0.0430752  0.08569449 -0.134597 -0.02234639
##          UMAP_7     UMAP_8      UMAP_9     UMAP_10 spatial_x spatial_y
## [1,] -0.6257519 0.37865797 -0.03104449 -0.09756495  6.808938 -3.186837
## [2,] -0.1052133 0.07261529  0.01877433 -0.01385205 -3.288978  9.190992
# clustering: see OSCA book (note transpose; number of clusters)
g <- buildSNNGraph(t(dims_clus), k = 10, d = ncol(dims_clus))
g_walk <- igraph::cluster_walktrap(g)

# select number of clusters
n_clus <- 8
clus <- igraph::cut_at(g_walk, n = n_clus)
table(clus)
## clus
##    1    2    3    4    5    6    7    8 
##  696 1563  450  234  371  274   35   16
stopifnot(length(clus) == ncol(sce_151673))

# plot
d_plot <- cbind(xy_coords, cluster = as.factor(clus))

ggplot(d_plot, aes(x = x_coord, y = y_coord, color = cluster)) + 
    geom_point(size = 2, alpha = 0.5) + 
    coord_fixed() + 
    scale_color_brewer(palette = "Paired") + 
    theme_bw() + 
    ggtitle("Clustering on top few UMAPs plus 2 spatial dims (scaled)")

filename <- "../plots/clustering/clustering_UMAPs_spatial.png"
ggsave(filename, width = 6, height = 6)

Top 10 UMAPs plus 2 spatial dimensions, with spatial dimensions scaled to range +5 to -5.

# number of UMAPs
n_umap <- 10
dims_clus <- cbind(dims_umap[, seq_len(n_umap), drop = FALSE], dims_spatial2)
head(dims_clus, 2)
##         UMAP_1       UMAP_2     UMAP_3      UMAP_4    UMAP_5      UMAP_6
## [1,] 1.6574038  0.211001493 -1.6190826 -0.04793527 -0.234065 -0.08746051
## [2,] 0.2069426 -0.002405581 -0.0430752  0.08569449 -0.134597 -0.02234639
##          UMAP_7     UMAP_8      UMAP_9     UMAP_10 spatial_x spatial_y
## [1,] -0.6257519 0.37865797 -0.03104449 -0.09756495  3.404469 -1.593419
## [2,] -0.1052133 0.07261529  0.01877433 -0.01385205 -1.644489  4.595496
# clustering: see OSCA book (note transpose; number of clusters)
g <- buildSNNGraph(t(dims_clus), k = 10, d = ncol(dims_clus))
g_walk <- igraph::cluster_walktrap(g)

# select number of clusters
n_clus <- 8
clus <- igraph::cut_at(g_walk, n = n_clus)
table(clus)
## clus
##    1    2    3    4    5    6    7    8 
## 1316  355  456  862  340  256   35   19
stopifnot(length(clus) == ncol(sce_151673))

# plot
d_plot <- cbind(xy_coords, cluster = as.factor(clus))

ggplot(d_plot, aes(x = x_coord, y = y_coord, color = cluster)) + 
    geom_point(size = 2, alpha = 0.5) + 
    coord_fixed() + 
    scale_color_brewer(palette = "Paired") + 
    theme_bw() + 
    ggtitle("Clustering on top few UMAPs plus 2 spatial dims (scaled)")

filename <- "../plots/clustering/clustering_UMAPs_spatial2.png"
ggsave(filename, width = 6, height = 6)

Top 10 UMAPs without any spatial dimensions.

# number of UMAPs
n_umap <- 10
dims_clus <- cbind(dims_umap[, seq_len(n_umap), drop = FALSE])
head(dims_clus, 2)
##         UMAP_1       UMAP_2     UMAP_3      UMAP_4    UMAP_5      UMAP_6
## [1,] 1.6574038  0.211001493 -1.6190826 -0.04793527 -0.234065 -0.08746051
## [2,] 0.2069426 -0.002405581 -0.0430752  0.08569449 -0.134597 -0.02234639
##          UMAP_7     UMAP_8      UMAP_9     UMAP_10
## [1,] -0.6257519 0.37865797 -0.03104449 -0.09756495
## [2,] -0.1052133 0.07261529  0.01877433 -0.01385205
# clustering: see OSCA book (note transpose; number of clusters)
g <- buildSNNGraph(t(dims_clus), k = 10, d = ncol(dims_clus))
g_walk <- igraph::cluster_walktrap(g)

# select number of clusters
n_clus <- 8
clus <- igraph::cut_at(g_walk, n = n_clus)
table(clus)
## clus
##    1    2    3    4    5    6    7    8 
## 2833  272  106   89   92  144   82   21
stopifnot(length(clus) == ncol(sce_151673))

# plot
d_plot <- cbind(xy_coords, cluster = as.factor(clus))

ggplot(d_plot, aes(x = x_coord, y = y_coord, color = cluster)) + 
    geom_point(size = 2, alpha = 0.5) + 
    coord_fixed() + 
    scale_color_brewer(palette = "Paired") + 
    theme_bw() + 
    ggtitle("Clustering on top few UMAPs")

filename <- "../plots/clustering/clustering_UMAPs_spatial3.png"
ggsave(filename, width = 6, height = 6)

SpatialDE marker genes plus spatial dimensions

SpatialDE marker genes plus 2 spatial dimensions, with spatial dimensions scaled to range +10 to -10.

# number of marker genes
ncol(dims_markers_SpatialDE)
## [1] 26
n_markers <- ncol(dims_markers_SpatialDE)
dims_clus <- cbind(dims_markers_SpatialDE[, seq_len(n_markers), drop = FALSE], dims_spatial)
head(dims_clus, 2)
##                    ENSG00000100285 ENSG00000183036 ENSG00000115756
## AAACAAGTATCTCCCA-1       0.2126437      -0.8112561       0.2984179
## AAACAATCTACTAGCA-1      -0.6399546       1.4061376      -0.8721925
##                    ENSG00000163032 ENSG00000034510 ENSG00000187094
## AAACAAGTATCTCCCA-1       0.6295376      -0.4311853        1.139977
## AAACAATCTACTAGCA-1       1.1177109      -1.1183125       -2.510971
##                    ENSG00000171476 ENSG00000138814 ENSG00000150625
## AAACAAGTATCTCCCA-1      -0.4183668        1.374690       0.4316475
## AAACAATCTACTAGCA-1       1.0299402       -1.502121       0.6499384
##                    ENSG00000171617 ENSG00000104722 ENSG00000277586
## AAACAAGTATCTCCCA-1       0.2030549       0.5885505       0.2541944
## AAACAATCTACTAGCA-1       0.6808459       2.3758891       0.6760906
##                    ENSG00000165023 ENSG00000173267 ENSG00000127585
## AAACAAGTATCTCCCA-1       -1.061712       0.8635048      -0.7890941
## AAACAATCTACTAGCA-1       -1.061712      -1.1465808       1.0384008
##                    ENSG00000131095 ENSG00000162545 ENSG00000145824
## AAACAAGTATCTCCCA-1       -1.202939       0.1672421       1.0154551
## AAACAATCTACTAGCA-1        1.359723      -2.7629152      -0.7027357
##                    ENSG00000137267 ENSG00000130558 ENSG00000251562
## AAACAAGTATCTCCCA-1       -1.150362       0.3914375      0.09163511
## AAACAATCTACTAGCA-1        2.060582      -2.7696145     -0.41887645
##                    ENSG00000105290 ENSG00000168314 ENSG00000136541
## AAACAAGTATCTCCCA-1      -0.6207722      -0.2288528      -0.5595312
## AAACAATCTACTAGCA-1      -0.1127079      -0.7050082      -0.5595312
##                    ENSG00000080822 ENSG00000091513 spatial_x spatial_y
## AAACAAGTATCTCCCA-1      -0.8021516      -0.8131082  6.808938 -3.186837
## AAACAATCTACTAGCA-1      -0.8021516      -0.8131082 -3.288978  9.190992
# clustering: see OSCA book (note transpose; number of clusters)
g <- buildSNNGraph(t(dims_clus), k = 10, d = ncol(dims_clus))
g_walk <- igraph::cluster_walktrap(g)

# select number of clusters
n_clus <- 8
clus <- igraph::cut_at(g_walk, n = n_clus)
table(clus)
## clus
##   1   2   3   4   5   6   7   8 
## 532 717 540 517 511 271 359 192
stopifnot(length(clus) == ncol(sce_151673))

# plot
d_plot <- cbind(xy_coords, cluster = as.factor(clus))

ggplot(d_plot, aes(x = x_coord, y = y_coord, color = cluster)) + 
    geom_point(size = 2, alpha = 0.5) + 
    coord_fixed() + 
    scale_color_brewer(palette = "Paired") + 
    theme_bw() + 
    ggtitle("Clustering on SpatialDE markers plus 2 spatial dims (scaled)")

filename <- "../plots/clustering/clustering_SpatialDE_spatial.png"
ggsave(filename, width = 6, height = 6)

SpatialDE marker genes plus 2 spatial dimensions, with spatial dimensions scaled to range +5 to -5.

# number of marker genes
ncol(dims_markers_SpatialDE)
## [1] 26
n_markers <- ncol(dims_markers_SpatialDE)
dims_clus <- cbind(dims_markers_SpatialDE[, seq_len(n_markers), drop = FALSE], dims_spatial2)
head(dims_clus, 2)
##                    ENSG00000100285 ENSG00000183036 ENSG00000115756
## AAACAAGTATCTCCCA-1       0.2126437      -0.8112561       0.2984179
## AAACAATCTACTAGCA-1      -0.6399546       1.4061376      -0.8721925
##                    ENSG00000163032 ENSG00000034510 ENSG00000187094
## AAACAAGTATCTCCCA-1       0.6295376      -0.4311853        1.139977
## AAACAATCTACTAGCA-1       1.1177109      -1.1183125       -2.510971
##                    ENSG00000171476 ENSG00000138814 ENSG00000150625
## AAACAAGTATCTCCCA-1      -0.4183668        1.374690       0.4316475
## AAACAATCTACTAGCA-1       1.0299402       -1.502121       0.6499384
##                    ENSG00000171617 ENSG00000104722 ENSG00000277586
## AAACAAGTATCTCCCA-1       0.2030549       0.5885505       0.2541944
## AAACAATCTACTAGCA-1       0.6808459       2.3758891       0.6760906
##                    ENSG00000165023 ENSG00000173267 ENSG00000127585
## AAACAAGTATCTCCCA-1       -1.061712       0.8635048      -0.7890941
## AAACAATCTACTAGCA-1       -1.061712      -1.1465808       1.0384008
##                    ENSG00000131095 ENSG00000162545 ENSG00000145824
## AAACAAGTATCTCCCA-1       -1.202939       0.1672421       1.0154551
## AAACAATCTACTAGCA-1        1.359723      -2.7629152      -0.7027357
##                    ENSG00000137267 ENSG00000130558 ENSG00000251562
## AAACAAGTATCTCCCA-1       -1.150362       0.3914375      0.09163511
## AAACAATCTACTAGCA-1        2.060582      -2.7696145     -0.41887645
##                    ENSG00000105290 ENSG00000168314 ENSG00000136541
## AAACAAGTATCTCCCA-1      -0.6207722      -0.2288528      -0.5595312
## AAACAATCTACTAGCA-1      -0.1127079      -0.7050082      -0.5595312
##                    ENSG00000080822 ENSG00000091513 spatial_x spatial_y
## AAACAAGTATCTCCCA-1      -0.8021516      -0.8131082  3.404469 -1.593419
## AAACAATCTACTAGCA-1      -0.8021516      -0.8131082 -1.644489  4.595496
# clustering: see OSCA book (note transpose; number of clusters)
g <- buildSNNGraph(t(dims_clus), k = 10, d = ncol(dims_clus))
g_walk <- igraph::cluster_walktrap(g)

# select number of clusters
n_clus <- 8
clus <- igraph::cut_at(g_walk, n = n_clus)
table(clus)
## clus
##   1   2   3   4   5   6   7   8 
## 425 754 414 413 421 502 313 397
stopifnot(length(clus) == ncol(sce_151673))

# plot
d_plot <- cbind(xy_coords, cluster = as.factor(clus))

ggplot(d_plot, aes(x = x_coord, y = y_coord, color = cluster)) + 
    geom_point(size = 2, alpha = 0.5) + 
    coord_fixed() + 
    scale_color_brewer(palette = "Paired") + 
    theme_bw() + 
    ggtitle("Clustering on SpatialDE markers plus 2 spatial dims (scaled)")

filename <- "../plots/clustering/clustering_SpatialDE_spatial2.png"
ggsave(filename, width = 6, height = 6)

SpatialDE marker genes plus 2 spatial dimensions, with spatial dimensions scaled to range +2 to -2.

# number of marker genes
ncol(dims_markers_SpatialDE)
## [1] 26
n_markers <- ncol(dims_markers_SpatialDE)
dims_clus <- cbind(dims_markers_SpatialDE[, seq_len(n_markers), drop = FALSE], dims_spatial3)
head(dims_clus, 2)
##                    ENSG00000100285 ENSG00000183036 ENSG00000115756
## AAACAAGTATCTCCCA-1       0.2126437      -0.8112561       0.2984179
## AAACAATCTACTAGCA-1      -0.6399546       1.4061376      -0.8721925
##                    ENSG00000163032 ENSG00000034510 ENSG00000187094
## AAACAAGTATCTCCCA-1       0.6295376      -0.4311853        1.139977
## AAACAATCTACTAGCA-1       1.1177109      -1.1183125       -2.510971
##                    ENSG00000171476 ENSG00000138814 ENSG00000150625
## AAACAAGTATCTCCCA-1      -0.4183668        1.374690       0.4316475
## AAACAATCTACTAGCA-1       1.0299402       -1.502121       0.6499384
##                    ENSG00000171617 ENSG00000104722 ENSG00000277586
## AAACAAGTATCTCCCA-1       0.2030549       0.5885505       0.2541944
## AAACAATCTACTAGCA-1       0.6808459       2.3758891       0.6760906
##                    ENSG00000165023 ENSG00000173267 ENSG00000127585
## AAACAAGTATCTCCCA-1       -1.061712       0.8635048      -0.7890941
## AAACAATCTACTAGCA-1       -1.061712      -1.1465808       1.0384008
##                    ENSG00000131095 ENSG00000162545 ENSG00000145824
## AAACAAGTATCTCCCA-1       -1.202939       0.1672421       1.0154551
## AAACAATCTACTAGCA-1        1.359723      -2.7629152      -0.7027357
##                    ENSG00000137267 ENSG00000130558 ENSG00000251562
## AAACAAGTATCTCCCA-1       -1.150362       0.3914375      0.09163511
## AAACAATCTACTAGCA-1        2.060582      -2.7696145     -0.41887645
##                    ENSG00000105290 ENSG00000168314 ENSG00000136541
## AAACAAGTATCTCCCA-1      -0.6207722      -0.2288528      -0.5595312
## AAACAATCTACTAGCA-1      -0.1127079      -0.7050082      -0.5595312
##                    ENSG00000080822 ENSG00000091513  spatial_x  spatial_y
## AAACAAGTATCTCCCA-1      -0.8021516      -0.8131082  1.3617876 -0.6373674
## AAACAATCTACTAGCA-1      -0.8021516      -0.8131082 -0.6577956  1.8381983
# clustering: see OSCA book (note transpose; number of clusters)
g <- buildSNNGraph(t(dims_clus), k = 10, d = ncol(dims_clus))
g_walk <- igraph::cluster_walktrap(g)

# select number of clusters
n_clus <- 8
clus <- igraph::cut_at(g_walk, n = n_clus)
table(clus)
## clus
##   1   2   3   4   5   6   7   8 
## 398 812 469 461 299 342 627 231
stopifnot(length(clus) == ncol(sce_151673))

# plot
d_plot <- cbind(xy_coords, cluster = as.factor(clus))

ggplot(d_plot, aes(x = x_coord, y = y_coord, color = cluster)) + 
    geom_point(size = 2, alpha = 0.5) + 
    coord_fixed() + 
    scale_color_brewer(palette = "Paired") + 
    theme_bw() + 
    ggtitle("Clustering on SpatialDE markers plus 2 spatial dims (scaled)")

filename <- "../plots/clustering/clustering_SpatialDE_spatial3.png"
ggsave(filename, width = 6, height = 6)

SpatialDE marker genes plus 2 spatial dimensions, with spatial dimensions scaled to range +1 to -1.

# number of marker genes
ncol(dims_markers_SpatialDE)
## [1] 26
n_markers <- ncol(dims_markers_SpatialDE)
dims_clus <- cbind(dims_markers_SpatialDE[, seq_len(n_markers), drop = FALSE], dims_spatial4)
head(dims_clus, 2)
##                    ENSG00000100285 ENSG00000183036 ENSG00000115756
## AAACAAGTATCTCCCA-1       0.2126437      -0.8112561       0.2984179
## AAACAATCTACTAGCA-1      -0.6399546       1.4061376      -0.8721925
##                    ENSG00000163032 ENSG00000034510 ENSG00000187094
## AAACAAGTATCTCCCA-1       0.6295376      -0.4311853        1.139977
## AAACAATCTACTAGCA-1       1.1177109      -1.1183125       -2.510971
##                    ENSG00000171476 ENSG00000138814 ENSG00000150625
## AAACAAGTATCTCCCA-1      -0.4183668        1.374690       0.4316475
## AAACAATCTACTAGCA-1       1.0299402       -1.502121       0.6499384
##                    ENSG00000171617 ENSG00000104722 ENSG00000277586
## AAACAAGTATCTCCCA-1       0.2030549       0.5885505       0.2541944
## AAACAATCTACTAGCA-1       0.6808459       2.3758891       0.6760906
##                    ENSG00000165023 ENSG00000173267 ENSG00000127585
## AAACAAGTATCTCCCA-1       -1.061712       0.8635048      -0.7890941
## AAACAATCTACTAGCA-1       -1.061712      -1.1465808       1.0384008
##                    ENSG00000131095 ENSG00000162545 ENSG00000145824
## AAACAAGTATCTCCCA-1       -1.202939       0.1672421       1.0154551
## AAACAATCTACTAGCA-1        1.359723      -2.7629152      -0.7027357
##                    ENSG00000137267 ENSG00000130558 ENSG00000251562
## AAACAAGTATCTCCCA-1       -1.150362       0.3914375      0.09163511
## AAACAATCTACTAGCA-1        2.060582      -2.7696145     -0.41887645
##                    ENSG00000105290 ENSG00000168314 ENSG00000136541
## AAACAAGTATCTCCCA-1      -0.6207722      -0.2288528      -0.5595312
## AAACAATCTACTAGCA-1      -0.1127079      -0.7050082      -0.5595312
##                    ENSG00000080822 ENSG00000091513  spatial_x  spatial_y
## AAACAAGTATCTCCCA-1      -0.8021516      -0.8131082  0.6808938 -0.3186837
## AAACAATCTACTAGCA-1      -0.8021516      -0.8131082 -0.3288978  0.9190992
# clustering: see OSCA book (note transpose; number of clusters)
g <- buildSNNGraph(t(dims_clus), k = 10, d = ncol(dims_clus))
g_walk <- igraph::cluster_walktrap(g)

# select number of clusters
n_clus <- 8
clus <- igraph::cut_at(g_walk, n = n_clus)
table(clus)
## clus
##   1   2   3   4   5   6   7   8 
## 437 329 413 800 790 706 113  51
stopifnot(length(clus) == ncol(sce_151673))

# plot
d_plot <- cbind(xy_coords, cluster = as.factor(clus))

ggplot(d_plot, aes(x = x_coord, y = y_coord, color = cluster)) + 
    geom_point(size = 2, alpha = 0.5) + 
    coord_fixed() + 
    scale_color_brewer(palette = "Paired") + 
    theme_bw() + 
    ggtitle("Clustering on SpatialDE markers plus 2 spatial dims (scaled)")

filename <- "../plots/clustering/clustering_SpatialDE_spatial4.png"
ggsave(filename, width = 6, height = 6)

SpatialDE marker genes without any spatial dimensions.

# number of marker genes
ncol(dims_markers_SpatialDE)
## [1] 26
n_markers <- ncol(dims_markers_SpatialDE)
dims_clus <- cbind(dims_markers_SpatialDE[, seq_len(n_markers), drop = FALSE])
head(dims_clus, 2)
##                    ENSG00000100285 ENSG00000183036 ENSG00000115756
## AAACAAGTATCTCCCA-1       0.2126437      -0.8112561       0.2984179
## AAACAATCTACTAGCA-1      -0.6399546       1.4061376      -0.8721925
##                    ENSG00000163032 ENSG00000034510 ENSG00000187094
## AAACAAGTATCTCCCA-1       0.6295376      -0.4311853        1.139977
## AAACAATCTACTAGCA-1       1.1177109      -1.1183125       -2.510971
##                    ENSG00000171476 ENSG00000138814 ENSG00000150625
## AAACAAGTATCTCCCA-1      -0.4183668        1.374690       0.4316475
## AAACAATCTACTAGCA-1       1.0299402       -1.502121       0.6499384
##                    ENSG00000171617 ENSG00000104722 ENSG00000277586
## AAACAAGTATCTCCCA-1       0.2030549       0.5885505       0.2541944
## AAACAATCTACTAGCA-1       0.6808459       2.3758891       0.6760906
##                    ENSG00000165023 ENSG00000173267 ENSG00000127585
## AAACAAGTATCTCCCA-1       -1.061712       0.8635048      -0.7890941
## AAACAATCTACTAGCA-1       -1.061712      -1.1465808       1.0384008
##                    ENSG00000131095 ENSG00000162545 ENSG00000145824
## AAACAAGTATCTCCCA-1       -1.202939       0.1672421       1.0154551
## AAACAATCTACTAGCA-1        1.359723      -2.7629152      -0.7027357
##                    ENSG00000137267 ENSG00000130558 ENSG00000251562
## AAACAAGTATCTCCCA-1       -1.150362       0.3914375      0.09163511
## AAACAATCTACTAGCA-1        2.060582      -2.7696145     -0.41887645
##                    ENSG00000105290 ENSG00000168314 ENSG00000136541
## AAACAAGTATCTCCCA-1      -0.6207722      -0.2288528      -0.5595312
## AAACAATCTACTAGCA-1      -0.1127079      -0.7050082      -0.5595312
##                    ENSG00000080822 ENSG00000091513
## AAACAAGTATCTCCCA-1      -0.8021516      -0.8131082
## AAACAATCTACTAGCA-1      -0.8021516      -0.8131082
# clustering: see OSCA book (note transpose; number of clusters)
g <- buildSNNGraph(t(dims_clus), k = 10, d = ncol(dims_clus))
g_walk <- igraph::cluster_walktrap(g)

# select number of clusters
n_clus <- 8
clus <- igraph::cut_at(g_walk, n = n_clus)
table(clus)
## clus
##   1   2   3   4   5   6   7   8 
## 946 398 855 282 639 236  47 236
stopifnot(length(clus) == ncol(sce_151673))

# plot
d_plot <- cbind(xy_coords, cluster = as.factor(clus))

ggplot(d_plot, aes(x = x_coord, y = y_coord, color = cluster)) + 
    geom_point(size = 2, alpha = 0.5) + 
    coord_fixed() + 
    scale_color_brewer(palette = "Paired") + 
    theme_bw() + 
    ggtitle("Clustering on SpatialDE markers")

filename <- "../plots/clustering/clustering_SpatialDE.png"
ggsave(filename, width = 6, height = 6)

UMAP performed on SpatialDE marker genes plus spatial dimensions

UMAP performed on SpatialDE marker genes, plus 2 spatial dimensions, with spatial dimensions scaled to range +5 to -5.

# number of marker genes
ncol(dims_umap_markers_SpatialDE)
## [1] 10
dims_clus <- cbind(dims_umap_markers_SpatialDE, dims_spatial2)
head(dims_clus, 2)
##                                                                       
## [1,] -0.9650340 -0.34255901 0.3410293  0.3742282 -0.6976575 -1.2204608
## [2,] -0.8869828  0.06444509 0.3855415 -0.3695716  0.5351947 -0.2057753
##                                                   spatial_x spatial_y
## [1,] 0.3853562 -0.53152415  0.5701943 -0.02610241  3.404469 -1.593419
## [2,] 0.6101937 -0.02825297 -0.1937826 -0.19957890 -1.644489  4.595496
# clustering: see OSCA book (note transpose; number of clusters)
g <- buildSNNGraph(t(dims_clus), k = 10, d = ncol(dims_clus))
g_walk <- igraph::cluster_walktrap(g)

# select number of clusters
n_clus <- 8
clus <- igraph::cut_at(g_walk, n = n_clus)
table(clus)
## clus
##    1    2    3    4    5    6    7    8 
##  165  567 1145  460  487  308  464   43
stopifnot(length(clus) == ncol(sce_151673))

# plot
d_plot <- cbind(xy_coords, cluster = as.factor(clus))

ggplot(d_plot, aes(x = x_coord, y = y_coord, color = cluster)) + 
    geom_point(size = 2, alpha = 0.5) + 
    coord_fixed() + 
    scale_color_brewer(palette = "Paired") + 
    theme_bw() + 
    ggtitle("Clustering on UMAP on SpatialDE markers plus 2 spatial dims (scaled)")

filename <- "../plots/clustering/clustering_UMAP_SpatialDE_spatial2.png"
ggsave(filename, width = 6, height = 6)

UMAP performed on SpatialDE marker genes, plus 2 spatial dimensions, with spatial dimensions scaled to range +2 to -2.

# number of marker genes
ncol(dims_umap_markers_SpatialDE)
## [1] 10
dims_clus <- cbind(dims_umap_markers_SpatialDE, dims_spatial3)
head(dims_clus, 2)
##                                                                       
## [1,] -0.9650340 -0.34255901 0.3410293  0.3742282 -0.6976575 -1.2204608
## [2,] -0.8869828  0.06444509 0.3855415 -0.3695716  0.5351947 -0.2057753
##                                                    spatial_x  spatial_y
## [1,] 0.3853562 -0.53152415  0.5701943 -0.02610241  1.3617876 -0.6373674
## [2,] 0.6101937 -0.02825297 -0.1937826 -0.19957890 -0.6577956  1.8381983
# clustering: see OSCA book (note transpose; number of clusters)
g <- buildSNNGraph(t(dims_clus), k = 10, d = ncol(dims_clus))
g_walk <- igraph::cluster_walktrap(g)

# select number of clusters
n_clus <- 8
clus <- igraph::cut_at(g_walk, n = n_clus)
table(clus)
## clus
##    1    2    3    4    5    6    7    8 
##  357 1031  931  505  381  259  129   46
stopifnot(length(clus) == ncol(sce_151673))

# plot
d_plot <- cbind(xy_coords, cluster = as.factor(clus))

ggplot(d_plot, aes(x = x_coord, y = y_coord, color = cluster)) + 
    geom_point(size = 2, alpha = 0.5) + 
    coord_fixed() + 
    scale_color_brewer(palette = "Paired") + 
    theme_bw() + 
    ggtitle("Clustering on UMAP on SpatialDE markers plus 2 spatial dims (scaled)")

filename <- "../plots/clustering/clustering_UMAP_SpatialDE_spatial3.png"
ggsave(filename, width = 6, height = 6)

UMAP performed on SpatialDE marker genes without any spatial dimensions.

# number of marker genes
ncol(dims_umap_markers_SpatialDE)
## [1] 10
dims_clus <- dims_umap_markers_SpatialDE
head(dims_clus, 2)
##            [,1]        [,2]      [,3]       [,4]       [,5]       [,6]
## [1,] -0.9650340 -0.34255901 0.3410293  0.3742282 -0.6976575 -1.2204608
## [2,] -0.8869828  0.06444509 0.3855415 -0.3695716  0.5351947 -0.2057753
##           [,7]        [,8]       [,9]       [,10]
## [1,] 0.3853562 -0.53152415  0.5701943 -0.02610241
## [2,] 0.6101937 -0.02825297 -0.1937826 -0.19957890
# clustering: see OSCA book (note transpose; number of clusters)
g <- buildSNNGraph(t(dims_clus), k = 10, d = ncol(dims_clus))
g_walk <- igraph::cluster_walktrap(g)

# select number of clusters
n_clus <- 8
clus <- igraph::cut_at(g_walk, n = n_clus)
table(clus)
## clus
##    1    2    3    4    5    6    7    8 
## 2432  355  130  221  210   89  173   29
stopifnot(length(clus) == ncol(sce_151673))

# plot
d_plot <- cbind(xy_coords, cluster = as.factor(clus))

ggplot(d_plot, aes(x = x_coord, y = y_coord, color = cluster)) + 
    geom_point(size = 2, alpha = 0.5) + 
    coord_fixed() + 
    scale_color_brewer(palette = "Paired") + 
    theme_bw() + 
    ggtitle("Clustering on UMAP on SpatialDE markers")

filename <- "../plots/clustering/clustering_UMAP_SpatialDE.png"
ggsave(filename, width = 6, height = 6)